This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
INSTALL NECESSARY PACKAGES:
# install.packages("erp.easy")
library(erp.easy)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
LOCATE FOLDERS:
# Locate the folder for the EEG output files (.txt) for old and new nets, replace the file location below with the one in your local device:
path_newnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/newnets/"
path_oldnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/oldnets/"
# Enter the number of participants in each group:
subs_new <- 57
subs_old <- 11
LOAD DATA: * Load the data from all subjects into a dataframe for each of the 4 conditions. * Convert all variables into factors so that the grand average function can work properly. * Do this separately for old nets and new nets since we will process them differently based on electrode locations (electrode numbers are different) * Make sure all files are 250m/s sampling rate - downsample beforehand if needed. Code would not run if a child has higher sampling rate.
# Load data into dataframes for each condition separately (the exported .txt files appear separately for each condition):
neg_go <- load.data(path_newnets,"NegGo", subs_new, -100, 999)
neg_nogo <- load.data(path_newnets,"NegNoGo", subs_new, -100, 999)
neut_go <- load.data(path_newnets,"NeutGo", subs_new, -100, 999)
neut_nogo <- load.data(path_newnets,"NeutNoGo", subs_new, -100, 999)
# Combine all conditions together into a single dataframe:
combo_new <- rbind.data.frame(neg_go, neg_nogo, neut_go, neut_nogo)
combo_new <- as.data.frame(unclass(combo_new), stringsAsFactors=TRUE)
# Repeat for old nets:
neg_go_old <- load.data(path_oldnets,"NegGo", subs_old, -100, 999)
neg_nogo_old <- load.data(path_oldnets,"NegNoGo", subs_old, -100, 999)
neut_go_old <- load.data(path_oldnets,"NeutGo", subs_old, -100, 999)
neut_nogo_old <- load.data(path_oldnets,"NeutNoGo", subs_old, -100, 999)
combo_old <- rbind.data.frame(neg_go_old, neg_nogo_old, neut_go_old, neut_nogo_old)
combo_old <- as.data.frame(unclass(combo_old),stringsAsFactors=TRUE)
SPECIFY THE ELECTRODE NUMBERS FOR P2, N2 and P3:
# We are only interested in frontal P2 (positive) and N2 (negative)
# Posterior P3 and Frontal P3
p2n2_newnets <- c("V4","V5","V10","V11","V12","V16","V18","V19")
p2n2_oldnets <- c("V4","V5","V10","V11","V12","V16","V19", "V20")
p3_newnets <- c("V54","V61","V62","V67","V72","V77","V78","V79")
p3_oldnets <- c("V54","V61","V62","V67","V68","V73","V78","V79","V80")
#Create average waveform plots for each subject in a single, multiplot window
mosaic(combo_new, p2n2_newnets, cols = 3, rows = 2)
mosaic(combo_new, p3_newnets, cols = 3, rows = 2)
CODE BELOW GETS ALL THE MEASURES (N2, P2, P3) FROM OLD AND NEW NET DATA, COMBINE THEM TOGETHER AND IT SAVES THE DATA INTO A FINAL COMBO SPREADSHEET:
# Get the measures from the old NEW net:
MeanAmp_P2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(150,300)))
MeanAmp_N2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(350,550)))
MeanAmp_P3_newnets <- (m.measures(combo_new, p3_newnets, window=c(450,750)))
Latency_P2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(150,300), pol="pos"))
Latency_N2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(350,550), pol="neg"))
Latency_P3_newnets <- (p.measures(combo_new, p3_newnets, window=c(450,750), pol="pos"))
# Combine all results together
# You can use full_join or merge in the same way:
total_new <- MeanAmp_P2_newnets %>%
full_join(MeanAmp_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(MeanAmp_P3_newnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_P2_newnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(Latency_P3_newnets, by = c("Subject", "Trial Type"))
# rename the variables without any suffix here:
# rename does not work properly unless you specify the package - some conflict:
total_new <- total_new %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
"Standard Dev.P3" = "Standard Dev",
"Peak Latency.P3" = "Peak Latency",
"Peak Amplitude.P3" = "Peak Amplitude")
# Get the measures from the old OLD net:
MeanAmp_P2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(150,300)))
MeanAmp_N2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(350,550)))
MeanAmp_P3_oldnets <- (m.measures(combo_old, p3_oldnets, window=c(450,750)))
Latency_P2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(150,300), pol="pos"))
Latency_N2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(350,550), pol="neg"))
Latency_P3_oldnets <- (p.measures(combo_old, p3_oldnets, window=c(450,750), pol="pos"))
# Combine all results together
total_old <- MeanAmp_P2_oldnets %>%
full_join(MeanAmp_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(MeanAmp_P3_oldnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_P2_oldnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(Latency_P3_oldnets, by = c("Subject", "Trial Type"))
# rename the variables without any suffix here:
total_old <- total_old %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
"Standard Dev.P3" = "Standard Dev",
"Peak Latency.P3" = "Peak Latency",
"Peak Amplitude.P3" = "Peak Amplitude")
# Combine old and new net data together:
combo <- full_join(total_new, total_old)
## Joining, by = c("Subject", "Trial Type", "Standard Dev.P2", "Mean
## Amplitude.P2", "Standard Dev.N2", "Mean Amplitude.N2", "Standard Dev.P3", "Mean
## Amplitude.P3", "Peak Latency.P2", "Peak Amplitude.P2", "Peak Latency.N2", "Peak
## Amplitude.N2", "Peak Latency.P3", "Peak Amplitude.P3")
# Making sure we are only adding new rows - participants
nrow(total_new) + nrow(total_old) == nrow(combo)
## [1] TRUE
ncol(total_old) == ncol(total_new)
## [1] TRUE
ncol(total_new) == ncol(combo)
## [1] TRUE
# Remove Grand Ave from data, order by subject name and reset the index:
combo <- combo[!(combo$Subject=="Grand Avg"),]
combo <- with(combo, combo[order(Subject) , ])
rownames(combo) <- NULL
unique(combo[c("Subject")])
## Subject
## 1 AE050318
## 5 AH101121
## 9 AK022218
## 13 AK102221
## 17 AL041819
## 21 AL090917
## 25 AN122116
## 29 AO013020
## 33 AS110816
## 37 AT051818
## 41 AW040217
## 45 AW110418
## 49 BW071018T2
## 53 CC102318T2
## 57 CF101019
## 61 CL040218
## 65 CM120919
## 69 DJ052417
## 73 EC041817
## 77 EG030618
## 81 EM100417
## 85 ES031519
## 89 ES032018
## 93 FW121816
## 97 GB012817
## 101 GR071921
## 105 HC102117
## 109 HC111621
## 113 HH061919
## 117 JA092118
## 121 JG091119T3
## 125 JJ011018
## 129 JK032119T3
## 133 JS121321
## 137 JT051618
## 141 KA022017
## 145 KE050718
## 149 KT072319
## 153 LB012619
## 157 LG100721
## 161 LS100617
## 165 LW102219T3
## 169 MM040119
## 173 MR091118T2
## 177 MS102319
## 181 NL041119
## 185 OG013016
## 189 PB021519
## 193 PW030417
## 197 RB101619
## 201 RH100218
## 205 RK040219T3
## 209 RS030518
## 213 RT032219
## 217 SA072518
## 221 SB111121
## 225 SK041519
## 229 SL090418
## 233 SP010219
## 237 ST100121
## 241 TA051917
## 245 TE062818
## 249 TS011518
## 253 WB110221
## 257 WF080417
## 261 WH022219
## 265 WK011122
## 269 WS051018
# CREATE A NEW COLUMN by taking the difference between N2-P2
combo$`N2P2avg` <- combo$`Mean Amplitude.N2` - combo$`Mean Amplitude.P2`
combo$`N2P2peak` <- combo$`Peak Amplitude.N2` - combo$`Peak Amplitude.P2`
# Write to a csv file:
write.csv(combo, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/combo.csv")
head(combo)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegGo 3.021852 -6.470334 3.907745
## 2 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 3 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 4 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 5 AH101121 NegGo 2.182505 -7.668577 1.576681
## 6 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -10.871784 1.377677 3.505749 220
## 2 2.023796 2.252998 3.432536 256
## 3 -15.687000 1.537678 4.367052 228
## 4 -7.857202 2.118532 7.358546 204
## 5 -24.930473 4.063984 18.547604 232
## 6 -24.144933 4.750914 14.173249 244
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -2.7612018 536 -5.187617 648
## 2 11.4069294 452 -2.268911 596
## 3 -7.3050607 376 -20.651664 588
## 4 -4.5040871 452 -7.270952 720
## 5 -5.8547045 492 -26.713190 528
## 6 -0.5727539 476 -25.672664 508
## Peak Amplitude.P3 N2P2avg N2P2peak
## 1 5.088120 -4.401450 -2.426415
## 2 6.870649 -1.507826 -13.675841
## 3 7.176728 -5.360848 -13.346603
## 4 10.265959 2.519769 -2.766865
## 5 23.899982 -17.261896 -20.858486
## 6 18.808306 -20.115323 -25.099910
MERGE WITH INTAKE INFO: * Load data exported from RedCap, named: Intake_Stuttering_Language_Varbls_for_Zoo_and_Reactivity * Feature engineer necessary variables and merge the above ERP dataset with TalkerGroup, Gender, Age info along with Stuttering and Language Scores
# Load DataSet:
intake <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/CognitiveEmotionalLi-IntakeStutteringLang.csv')
# Subject IDs include the visit number in the combo dataset if it is NOT the first time point.
# Do the same here: Combine visit number with subject and create a new Subject variable so that it matches the combo:
intake <- intake %>%
mutate(Subject = ifelse(visitnumber != 1, paste0(part_id_status, "T", visitnumber), part_id_status))
# Calculate the month difference using BIRTHDATE and CVE date to make sure the autocalculator is correct:
# Install and load the lubridate package
# install.packages("lubridate")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# the %/% operator is used to perform the floor division of the interval by months(1), which represents one month.
intake$month_diff <- interval(intake$date_birth, intake$cve_date) %/% months(1)
print(intake$month_diff)
## [1] NA 38 NA 70 50 51 62 NA 51 NA NA NA 80 63 NA 60 60 NA 62 44 NA 48 77 NA NA
## [26] 45 NA 55 62 NA 61 NA 42 NA NA 79 49 NA 63 47 NA 78 78 52 NA NA 48 NA 74 NA
## [51] 55 70 53 52 NA 63 45 62 NA 48 49 42 NA 76 38 77 NA NA 62 78 39 NA 49 43 NA
## [76] 43 50 41 42 NA 75 49 NA 36 36 NA 50 NA 68 NA 42 47 NA 58 58 71 51 43 NA 50
## [101] NA 65 81 NA 40 44 NA NA 46 NA 53 NA 41 NA NA NA 54 76 50 NA 55 NA 55 75 NA
## [126] 62 51 53 NA 41 46 NA 44 NA NA 75 53 NA 38 NA 41 72 NA 43 52 NA 47 NA 52 NA
## [151] NA NA 75 45 NA 52 NA 61 NA 75 48 53 72 NA 42 53 NA 48 NA 56 77 NA NA 41 NA
print(round(intake$calculator_age_cve))
## [1] NA 38 NA 70 51 52 62 87 51 NA NA NA 80 64 NA 60 60 NA 63 44 NA 49 78 NA NA
## [26] 46 NA 56 62 NA 61 88 43 NA NA 79 50 NA 63 47 64 79 79 53 NA NA 49 NA 75 NA
## [51] 56 70 53 52 NA 64 45 62 NA 48 50 42 NA 76 38 77 NA NA 62 79 39 68 50 43 81
## [76] 43 50 42 42 NA 75 49 NA 36 37 62 50 NA 69 NA 42 48 NA 58 59 71 52 43 NA 50
## [101] 80 65 82 NA 40 44 NA NA 46 NA 54 NA 42 70 NA NA 54 77 50 NA 55 NA 56 76 NA
## [126] 62 52 54 NA 42 46 NA 45 73 NA 76 53 NA 38 64 41 73 NA 44 52 NA 48 73 52 NA
## [151] NA NA 75 46 NA 52 NA 61 NA 75 49 54 73 NA 42 54 NA 48 75 56 77 NA NA 42 67
# Not a big difference between the hand calculated and auto calculated field. Let's use "calculator_age_cve"
# Create a new variable representing final sldper100words ("disfluency_sldper100words_final) by taking disfluency_sldper100words from CVD as primary, but in the case that this data is missing, take the disfluency scores from CVE:
intake <- intake %>%
mutate(disfluency_sldper100words_final = ifelse(!is.na(disfluency_sldper100words), disfluency_sldper100words, disfluency_sldper100words_cve))
# Create a final talker group variable ("talkergroup_final) using disfluency_sldper100words_final and talker group based on parent report:
# 1: CWS, 0:CWNS, 9:unidentified
intake <- intake %>%
mutate(talkergroup_final = ifelse((disfluency_sldper100words_final >= 3 | calculator_talkergroup_parent == 1), 1,
ifelse((disfluency_sldper100words_final < 3 & calculator_talkergroup_parent == 0), 0, 9)))
# ignoring the NA's:
# intake <- intake %>%
# mutate(talkergroup_overall_final = ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == 1) |
# (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 1), 1,
# ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == &
# (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 0), 0, 9)))
# Take the relevant columns from intake dataset
# You may update this to take more columns into the dataset:
intake <- subset(intake, select=c('Subject','calculator_age_cve','calculator_gender_cve',
'calculator_talkergroup_parent','tso_calculated', 'disfluency_sldper100words','ssi_total',
'disfluency_sldper100words_final', 'talkergroup_final',
"gfta_standard", "ppvt_standard", "evt_standard",
"teld_rec_standard","teld_exp_standard", "teld_spokenlang_standard",
'cve_comments','comments_tasks'))
# Merge with the main dataset using SUBJECT
FULL <- merge(combo, intake, by=c("Subject"))
head(FULL)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 2 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 3 AE050318 NegGo 3.021852 -6.470334 3.907745
## 4 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 5 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## 6 AH101121 NegGo 2.182505 -7.668577 1.576681
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 2.023796 2.252998 3.432536 256
## 2 -7.857202 2.118532 7.358546 204
## 3 -10.871784 1.377677 3.505749 220
## 4 -15.687000 1.537678 4.367052 228
## 5 -24.144933 4.750914 14.173249 244
## 6 -24.930473 4.063984 18.547604 232
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 11.4069294 452 -2.268911 596
## 2 -4.5040871 452 -7.270952 720
## 3 -2.7612018 536 -5.187617 648
## 4 -7.3050607 376 -20.651664 588
## 5 -0.5727539 476 -25.672664 508
## 6 -5.8547045 492 -26.713190 528
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 1 6.870649 -1.507826 -13.675841 38.1
## 2 10.265959 2.519769 -2.766865 38.1
## 3 5.088120 -4.401450 -2.426415 38.1
## 4 7.176728 -5.360848 -13.346603 38.1
## 5 18.808306 -20.115323 -25.099910 70.5
## 6 23.899982 -17.261896 -20.858486 70.5
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1 0 1 1.9
## 2 0 1 1.9
## 3 0 1 1.9
## 4 0 1 1.9
## 5 1 0 NA
## 6 1 0 NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1 12 23 12
## 2 12 23 12
## 3 12 23 12
## 4 12 23 12
## 5 0 6 0
## 6 0 6 0
## talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1 1 121 126 123 146
## 2 1 121 126 123 146
## 3 1 121 126 123 146
## 4 1 121 126 123 146
## 5 0 104 111 111 110
## 6 0 104 111 111 110
## teld_exp_standard teld_spokenlang_standard cve_comments
## 1 135 149
## 2 135 149
## 3 135 149
## 4 135 149
## 5 115 115
## 6 115 115
## comments_tasks
## 1
## 2
## 3
## 4
## 5 Good data, attentive child, but sleepy during the Reactivity task.
## 6 Good data, attentive child, but sleepy during the Reactivity task.
FIND THE UNDEFINED (9) TALKER GROUPS AND MANUALLY MARK THEM AS EITHER 1 or 0:
# Check the subject numbers with missing stuttering assessments:
rows_with_null <- FULL[is.na(FULL$disfluency_sldper100words) | is.na(FULL$ssi_total) | is.na(FULL$ssi_severity), ]
# | is.na(FULL_2$calculator_talkergroup_parent) makes no difference
unique(rows_with_null$Subject)
## character(0)
# Show the rows where talkergroup_final = 9 or NA :
short_RT_rows <- subset(FULL, talkergroup_final == 9 | is.na(talkergroup_final))
short_RT_rows
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 145 LW102219T3 NegGo 4.047862 -11.3533653 1.226660
## 146 LW102219T3 NegNoGo 7.716179 -14.3407325 1.015503
## 147 LW102219T3 NeutGo 3.809460 -16.0253408 3.957634
## 148 LW102219T3 NeutNoGo 7.502369 -13.4832903 2.684751
## 193 SA072518 NegGo 1.044944 -1.8257751 1.919929
## 194 SA072518 NegNoGo 3.253022 -2.8618672 2.448918
## 195 SA072518 NeutGo 3.037777 -1.3041270 2.886428
## 196 SA072518 NeutNoGo 2.757350 -0.2626314 2.512955
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 145 -15.50399 0.9484819 0.7954588 160
## 146 -20.08492 0.9063114 2.8402350 160
## 147 -13.05854 2.4802956 -2.5216476 284
## 148 -16.74960 1.1207562 5.1640531 276
## 193 -10.16839 2.3822622 11.3534178 244
## 194 -18.03140 3.8740673 25.2926602 240
## 195 -8.41987 1.0071346 7.7124837 232
## 196 -10.71480 3.0017872 11.6209099 264
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 145 -6.9007481 488 -16.33392 488
## 146 -5.0478235 392 -20.30981 684
## 147 -21.1105926 352 -18.30229 456
## 148 -23.1688279 352 -22.34015 680
## 193 -0.2973903 516 -12.97347 612
## 194 3.1531559 444 -21.27522 676
## 195 2.5669589 444 -11.77583 492
## 196 5.1291300 440 -14.92553 568
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 145 2.524455 -4.150623 -9.4331692 79.8
## 146 4.480037 -5.744191 -15.2619911 79.8
## 147 2.975747 2.966805 2.8082985 79.8
## 148 6.883948 -3.266312 0.8286815 79.8
## 193 15.744950 -8.342619 -12.6760764 73.0
## 194 31.451418 -15.169536 -24.4283726 73.0
## 195 9.651857 -7.115743 -14.3427915 73.0
## 196 16.708875 -10.452173 -20.0546604 73.0
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 145 1 NA NA
## 146 1 NA NA
## 147 1 NA NA
## 148 1 NA NA
## 193 1 NA NA
## 194 1 NA NA
## 195 1 NA NA
## 196 1 NA NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 145 NA NA 0
## 146 NA NA 0
## 147 NA NA 0
## 148 NA NA 0
## 193 NA NA NA
## 194 NA NA NA
## 195 NA NA NA
## 196 NA NA NA
## talkergroup_final gfta_standard ppvt_standard evt_standard
## 145 NA NA NA NA
## 146 NA NA NA NA
## 147 NA NA NA NA
## 148 NA NA NA NA
## 193 NA NA NA NA
## 194 NA NA NA NA
## 195 NA NA NA NA
## 196 NA NA NA NA
## teld_rec_standard teld_exp_standard teld_spokenlang_standard
## 145 NA NA NA
## 146 NA NA NA
## 147 NA NA NA
## 148 NA NA NA
## 193 NA NA NA
## 194 NA NA NA
## 195 NA NA NA
## 196 NA NA NA
## cve_comments
## 145 Did fluency count abs CELF screener completed at visit.
## 146 Did fluency count abs CELF screener completed at visit.
## 147 Did fluency count abs CELF screener completed at visit.
## 148 Did fluency count abs CELF screener completed at visit.
## 193
## 194
## 195
## 196
## comments_tasks
## 145 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 146 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 147 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 148 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 193
## 194
## 195
## 196
# MANUALLY LABEL THE TALKERGROUP FOR NA's:
# LW102219T3 == 0 because there is a record of a disflunecy count of 0, although no parent report available.
# SA072518 == 0, there is no indication in RedCap that the child stutterss
# Replace NA values in a specific column based on a condition:
FULL$talkergroup_final <- ifelse(FULL$Subject == "LW102219T3" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)
FULL$talkergroup_final <- ifelse(FULL$Subject == "SA072518" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)
# Making sure no 9 or NA remained:
any(FULL$talkergroup_final == 9 | is.na(FULL$talkergroup_final))
## [1] FALSE
YOU CAN CREATE A NEW SUBSET WITH ONLY GOOD DATA HERE:
# Create a new subset with ONLY good data, by removing those participants you identified as having bad data:
combo_good <- dplyr::filter(combo, Subject!="AE050318", Subject!="AL090917", Subject!="ES031519", Subject!="LS100617", Subject!="LG100721", Subject!="MS102319", Subject!="PB021519", Subject!="RT032219")
head(combo_good)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AH101121 NegGo 2.182505 -7.668577 1.576681
## 2 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## 3 AH101121 NeutGo 1.839147 -4.235501 2.891194
## 4 AH101121 NeutNoGo 3.776016 0.651977 2.494542
## 5 AK022218 NegGo 4.825410 8.927914 5.937896
## 6 AK022218 NegNoGo 3.586909 5.862981 5.437614
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -24.930473 4.063984 18.54760 232
## 2 -24.144933 4.750914 14.17325 244
## 3 -20.625652 4.391747 16.31334 224
## 4 -15.905247 5.252118 10.57876 236
## 5 -7.418664 1.763918 16.08891 204
## 6 -10.567050 6.287977 23.56859 204
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -5.8547045 492 -26.71319 528
## 2 -0.5727539 476 -25.67266 508
## 3 -1.3806822 540 -23.88108 508
## 4 6.3521030 452 -19.63251 588
## 5 16.4295335 488 -14.04309 648
## 6 11.5147669 452 -19.36042 628
## Peak Amplitude.P3 N2P2avg N2P2peak
## 1 23.89998 -17.26190 -20.85849
## 2 18.80831 -20.11532 -25.09991
## 3 21.79867 -16.39015 -22.50040
## 4 15.80878 -16.55722 -25.98461
## 5 19.63726 -16.34658 -30.47263
## 6 36.67178 -16.43003 -30.87518
LOAD AND MERGE WITH BEHAVIORAL DATA: * for GO and NOGO conditions use ShowStim.RESP to compute the accuracy. * ShowStim.RESP: 4 it means the child pushed the button (accurate for Go), NA means no response (accurate for NoGo).
# Load the file:
accuracy <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/Merged_Zoo_05.18.23.csv')
# Take only the relevant variables:
accuracy <- subset(accuracy, select=c('Name','VisitNumber','ShowStim.ACC','ShowStim.RESP','ShowStim.RT','StimTag'))
# Check out the class types for each variable.
sapply(accuracy, class)
## Name VisitNumber ShowStim.ACC ShowStim.RESP ShowStim.RT
## "character" "integer" "integer" "character" "integer"
## StimTag
## "character"
# For ShowStim.RESP response 4 is a "character", not integer.
print(class(accuracy$ShowStim.RESP))
## [1] "character"
# Convert character 4 for ShowStim.RESP to integer
# accuracy$ShowStim.RESP <- as.integer(accuracy$ShowStim.RESP)
# Create a new ACCURACY column based on Go or NoGo conditions.
# ShowStim.RESP: 4 means the child pushed the button (accurate for Go), NA means no response (accurate for NoGo).
# 1 is accurate, 0 is inaccurate:
accuracy <- accuracy %>%
filter((StimTag == 'negN' | StimTag == 'neuN') |
(StimTag == 'negG' | StimTag == 'neuG')) %>%
mutate(accuracy = case_when(
(StimTag == 'negN' | StimTag == 'neuN') & ShowStim.RESP == '4' ~ 0,
(StimTag == 'negN' | StimTag == 'neuN') & TRUE ~ 1,
(StimTag == 'negG' | StimTag == 'neuG') & ShowStim.RESP == '4' ~ 1,
(StimTag == 'negG' | StimTag == 'neuG') & TRUE ~ 0
))
####YOU MIGHT DECIDE TO REMOVE ALL TRIALS WITH RT SHORTER THAN 148MS FOR THE ACCURACY CALCULATION#####
# Display the short RT rows and how many of them there are:
short_RT_rows <- accuracy[(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
head(short_RT_rows)
## Name VisitNumber ShowStim.ACC ShowStim.RESP ShowStim.RT StimTag
## 636 AO013020 1 0 4 117 neuN
## NA <NA> NA NA <NA> NA <NA>
## 2002 WF080417 1 0 4 59 neuN
## 2004 WF080417 1 1 4 75 neuG
## 2006 WF080417 1 0 4 136 neuN
## NA.1 <NA> NA NA <NA> NA <NA>
## accuracy
## 636 0
## NA NA
## 2002 0
## 2004 1
## 2006 0
## NA.1 NA
print(nrow(short_RT_rows))
## [1] 587
describe(short_RT_rows$StimTag)
## short_RT_rows$StimTag
## n missing distinct
## 551 36 4
##
## Value negG negN neuG neuN
## Frequency 209 93 185 64
## Proportion 0.379 0.169 0.336 0.116
# THERE ARE 587 ROWS WITH RT LOWER THAN 148ms but ALL of them are NOGO trials anyways, so they are incorrect anyways.
# NO NEED TO CHANGE THE ACCURACY MEASURE ABOVE because Low RT is relevant only for Go trials.
# Create a subset of whole data set by excluding the very short RT trials.
# accuracy_filtered <- accuracy[!(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
# discrepancies <- accuracy$accuracy != accuracy$accuracy_filtered
# discrepancy_rows <- which(discrepancies)
# print(discrepancy_rows)
# create ACCURACY PERCENTAGE column
accuracy_percent <- accuracy %>%
group_by(Name, VisitNumber, StimTag) %>%
dplyr::summarize(accuracy_percentage = mean(accuracy) * 100)
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# calculate REACTION TIME for GO only
reaction_time <- accuracy %>%
filter(accuracy == 1 & (StimTag == 'negG' | StimTag == 'neuG')) %>%
group_by(Name, VisitNumber,StimTag) %>%
dplyr::summarize(reaction_time = mean(ShowStim.RT))
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# COMBINE accuracy_percent and reaction_time
eprime <- full_join(accuracy_percent, reaction_time, by=c("Name", "VisitNumber", "StimTag"))
# Combine visit number with subject and create a new Subject variable for eprime so that it matches the FULL
eprime <- eprime %>%
mutate(Subject = ifelse(VisitNumber != 1, paste0(Name, "T", VisitNumber), Name))
# Rename the labels for StimTags on eprime data
eprime <- eprime %>%
mutate(StimTag = recode(StimTag, "negG" = "NegGo", "negN" = "NegNoGo", "neuG" = "NeutGo", "neuN" = "NeutNoGo"))
# Drop name and Visitnumber from eprime
# Ungroup the dataframe first
eprime <- ungroup(eprime)
eprime <- eprime %>%
dplyr::select(-Name, -VisitNumber) # eprime <- select(eprime, -Name, -VisitNumber)
# Replace Trial Type in FULL with "StimTag" to be able to merge with eprime data
FULL <- FULL %>%
dplyr::rename("StimTag" = "Trial Type")
# COMBINE ALL!!
ZOO <- merge(FULL, eprime, by=c("Subject", "StimTag"))
head(ZOO)
## Subject StimTag Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegGo 3.021852 -6.470334 3.907745
## 2 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 3 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 4 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 5 AH101121 NegGo 2.182505 -7.668577 1.576681
## 6 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -10.871784 1.377677 3.505749 220
## 2 2.023796 2.252998 3.432536 256
## 3 -15.687000 1.537678 4.367052 228
## 4 -7.857202 2.118532 7.358546 204
## 5 -24.930473 4.063984 18.547604 232
## 6 -24.144933 4.750914 14.173249 244
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -2.7612018 536 -5.187617 648
## 2 11.4069294 452 -2.268911 596
## 3 -7.3050607 376 -20.651664 588
## 4 -4.5040871 452 -7.270952 720
## 5 -5.8547045 492 -26.713190 528
## 6 -0.5727539 476 -25.672664 508
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 1 5.088120 -4.401450 -2.426415 38.1
## 2 6.870649 -1.507826 -13.675841 38.1
## 3 7.176728 -5.360848 -13.346603 38.1
## 4 10.265959 2.519769 -2.766865 38.1
## 5 23.899982 -17.261896 -20.858486 70.5
## 6 18.808306 -20.115323 -25.099910 70.5
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1 0 1 1.9
## 2 0 1 1.9
## 3 0 1 1.9
## 4 0 1 1.9
## 5 1 0 NA
## 6 1 0 NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1 12 23 12
## 2 12 23 12
## 3 12 23 12
## 4 12 23 12
## 5 0 6 0
## 6 0 6 0
## talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1 1 121 126 123 146
## 2 1 121 126 123 146
## 3 1 121 126 123 146
## 4 1 121 126 123 146
## 5 0 104 111 111 110
## 6 0 104 111 111 110
## teld_exp_standard teld_spokenlang_standard cve_comments
## 1 135 149
## 2 135 149
## 3 135 149
## 4 135 149
## 5 115 115
## 6 115 115
## comments_tasks
## 1
## 2
## 3
## 4
## 5 Good data, attentive child, but sleepy during the Reactivity task.
## 6 Good data, attentive child, but sleepy during the Reactivity task.
## accuracy_percentage reaction_time
## 1 84.16667 706.0297
## 2 57.50000 NA
## 3 90.83333 796.2385
## 4 72.50000 NA
## 5 99.16667 643.1176
## 6 87.50000 NA
write.csv(ZOO, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv")
YOU CAN CREATE A NEW SUBSET WITH ONLY HIGHER BEHAVIROAL ACCURACY!
# ZOO <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv')
ZOO_good <- subset(ZOO, accuracy_percentage > 75)
filtered_rows <- ZOO[ZOO$accuracy_percentage < 75, ]
nrow(ZOO)
## [1] 232
nrow(ZOO_good)
## [1] 163
summary(ZOO$accuracy_percentage)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.00 72.50 88.84 83.38 95.83 100.00
LINE PLOTS - WRITE FUNCTION: * This function gives a summary bar graph by taking data and measurevar as input. * Data: the data frame, measurevar: The target dependent variable we are interested in.
library(Rmisc);
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
##
## is.discrete, summarize
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggplot2);
create_summary_plot <- function(data, measurevar) {
# Converts the data object into a data frame, while ensuring that character variables are treated as factors in the data frame:
data <- as.data.frame(unclass(data), stringsAsFactors = TRUE)
# Get summary statistics for each StimTag and TalkerGroup:
summary <- summarySE(data, measurevar = measurevar, groupvars = c("StimTag", "talkergroup_final"))
# Generate the summary plot using ggplot2:
# By using !!rlang::sym(measurevar), we convert the measurevar string into a symbol that can be evaluated in the context of the # ggplot aesthetics. This allows the measurevar parameter to be dynamically incorporated into the plot.
# The gsub function searches for dots (\\.) and replaces them with spaces.
# This code would directly paste the var name: labs(y = paste0(measurevar, "\n"), x = "\nCondition")
plot <- ggplot(summary, aes(x = StimTag, y = !!rlang::sym(measurevar), fill = talkergroup_final)) +
geom_bar(stat = "identity", color = "black", position = position_dodge()) +
geom_errorbar(aes(ymin = !!rlang::sym(measurevar) - se, ymax = !!rlang::sym(measurevar) + se),
width = 0.2, position = position_dodge(.9)) +
facet_wrap(~ talkergroup_final) +
theme_classic() +
labs(y = gsub("\\.", " ", measurevar), x = "\nCondition") +
theme(axis.title.x = element_text(family = "Arial", color = "grey20", size = 12, angle = 0),
axis.title.y = element_text(family = "Arial", color = "grey20", size = 12, angle = 90),
strip.text.x = element_text(family = "Arial", color = "grey20", size = 10, angle = 0),
legend.position = "none")
# Return the generated plot
return(plot)
}
## EXTRA GGPLOT FEATURES:
# scale_y_continuous(limits = c(-0.2, 0.8), breaks = c(-0.2, 0, 0.2, 0.4, 0.6, 0.8)) +
# ggsave("/Users/aysuerdemir/Desktop/R workspace/Myplots/boxplot7_fromdescriptivedata.png", dpi=300, units="in", height=6, width=8)
# theme <-
# legend.title = element_text(family="Arial", color = "grey20", size = 18, angle = 0, hjust = .5, vjust = .5, face = "plain"),
# legend.text = element_text(family="Arial", color = "grey20", size = 18, angle = 0, hjust = .5, vjust = .5, face = "plain"),
# legend.title.align = 0, legend.text.align = 0)
DRAW THE PLOTS USING THE FUNCTION ABOVE:
summary_plot_P2 <- create_summary_plot(data = ZOO, measurevar = "Mean.Amplitude.P2")
print(summary_plot_P2)
summary_plot_N2 <- create_summary_plot(data = ZOO, measurevar = "Mean.Amplitude.N2")
print(summary_plot_N2)
summary_plot_N2P2avg <- create_summary_plot(data = ZOO, measurevar = "N2P2avg")
print(summary_plot_N2P2avg)
summary_plot_N2P2peak <- create_summary_plot(data = ZOO, measurevar = "N2P2peak")
print(summary_plot_N2P2peak)
summary_plot_P3 <- create_summary_plot(data = ZOO, measurevar = "Mean.Amplitude.P3")
print(summary_plot_P3)
ZOO <- as.data.frame(unclass(ZOO), stringsAsFactors = TRUE)
summary <- summarySE(ZOO, measurevar=c("Peak.Latency.N2"), groupvars=c("StimTag","talkergroup_final"))
ggplot(summary, aes(x=StimTag, y=Peak.Latency.N2, fill=talkergroup_final) ) +
geom_bar(stat="identity", color="black", position=position_dodge()) +
geom_errorbar(aes(ymin = Peak.Latency.N2-se, ymax = Peak.Latency.N2+se), width=0.2, position = position_dodge(.9)) +
facet_wrap(~talkergroup_final) +
theme_classic() + labs(y="Peak.Latency.N2\n", x = "\nCondition") +
theme(axis.title.x = element_text(family="Arial", color = "grey20", size = 12, angle = 0),
axis.title.y = element_text(family="Arial", color = "grey20", size = 12, angle = 90),
strip.text.x = element_text(family="Arial", color = "grey20", size = 10, angle = 0),
legend.position = "none") +
coord_cartesian(ylim = c(300, 500))
summary_plot_P2_Latency <- create_summary_plot(data = ZOO, measurevar = "Peak.Latency.P2")
print(summary_plot_P2_Latency)
summary_plot_P3_Latency <- create_summary_plot(data = ZOO, measurevar = "Peak.Latency.P3")
print(summary_plot_P3_Latency)
# coord_cartesian(ylim = c(200, 650))
summary_plot_P2_StDev <- create_summary_plot(data = ZOO, measurevar = "Standard.Dev.P2")
print(summary_plot_P2_StDev)
summary_plot_N2_StDev <- create_summary_plot(data = ZOO, measurevar = "Standard.Dev.N2")
print(summary_plot_N2_StDev)
summary_plot_P3_StDev <- create_summary_plot(data = ZOO, measurevar = "Standard.Dev.P3")
print(summary_plot_P3_StDev)